***********************************************************************
*This file builds vars on completions by field of study from the IPEDS*
***********************************************************************

use "$clean_data_education/completionsMajorReshaped.dta", replace

merge 1:m unitid yearGroup using "$clean_data_education/locations.dta", keep(3) nogenerate
	drop if nonMainland == 1 | (yearG == 1990 & mi(czone)) // dropping uni in alaska and so on and dropping some obs in 1991 that have details for enrollment but not on institution details

merge m:1 unitid using "$clean_data_education/institutionControls.dta", keep(3) keepusing(forProfit public freshmenFullTimeAvg studentsAvg sector) nogenerate 
	
	drop if yearGroup == 1990 // comment this if you need the pct analysis
	drop if forProfit == 1
	drop if freshmenFullTimeAvg < 50
	drop if periods < 3
	keep if (sector >= 4 & sector <= 6)  // 2Y institutions
	
* quartile groups from field of study to occupation data
egen totalFieldsQ1 = rsum(total13 total17 total18 total16 total19 total20 total23 total25 total26 total41 total42 total44 total51)
egen totalFieldsQ2 = rsum(total5 total9 total11 total22 total27 total28 total29 total30 total45 total52 total6 total7 total8)
egen totalFieldsQ3 = rsum(total3 total4 total12 total14 total24 total38 total40 total43 total54)
egen totalFieldsQ4 = rsum(total1 total2 total10 total15 total31 total39 total46 total47 total48 total49 total50)
egen totalFieldsM1 = rsum(totalFieldsQ1 totalFieldsQ2)
egen totalFieldsM2 = rsum(totalFieldsQ3 totalFieldsQ4)

* new classification
egen totalNatural = rsum(total1 total2 total3 total26 total27 total40)
egen totalEngCom = rsum(total4 total10 total11 total14 total15 total41)
egen totalSocialS = rsum(total5 total9 total13 total19 total20 total42 total45)
egen totalBusEco = rsum(total6 total7 total8 total52)
egen totalHumArts = rsum(total12 total16 total22 total23 total24 total25 total30 total31 total38 total39 total50 total54)
egen totalManuf = rsum(total46 total47 total48 total49)
egen totalHealthS = rsum(total17 total18 total51)
egen totalPubMil = rsum(total28 total29 total43 total44)
egen totalMajors = rsum(totalNatural totalEngCom totalSocialS totalBusEco totalHumArts totalManuf totalHealthS totalPubMil)

keep unitid yearGroup czone fips total* ourRatio 

*** collapse 
foreach i of var total* {
replace `i' = `i' * ourRatio 
}


collapse (sum) total*, by(czone yearGroup)

* merge populations data
merge 1:1 czone yearGroup using "$clean_data_lmarket/census_populationDataCZ.dta", assert(2 3)
	drop if _merge == 2 // since we do not have all the CZs in IPEDS data 
	drop _merge


foreach i in FieldsQ1 FieldsQ2 FieldsQ3 FieldsQ4 FieldsM1 FieldsM2 Natural EngCom SocialS BusEco HumArts Manuf HealthS PubMil {
	gen share`i' = total`i'/totalMajors 
	gen share`i'Pop = total`i'/ipums_pop 
	}

gen shareCompletions = totalMajors/ipums_pop
drop if totalMajors == 0


* delta 
sort czone yearGroup
gen d = .
	replace d = 1 if yearGroup == 1994
	replace d = 2 if yearGroup == 2000
	replace d = 3 if yearGroup == 2007
	replace d = 4 if yearGroup == 2014
	drop if yearGroup == 1990

	*Temp file to match deltas with ratios by year (to have the beginning of period on the save row)
	preserve
	drop total* 
	gen year = 1990 if yearGroup == 1994
	replace year = 2000 if yearGroup == 2000
	replace year = 2008 if yearGroup == 2007
	drop if mi(year)
	drop yearGroup d
	save "$final_data_outcomes/temp2.dta", replace
	restore


xtset czone d
foreach i of var share* {
gen  d_`i' = (`i' - L.`i')*100
}


xtset, clear
keep yearGroup czone d_* // le riattacchiamo dopo 

* generate time variable needed by Ben's code
gen year = 1990 if yearGroup == 2000
replace year = 2000 if yearGroup == 2007
replace year = 2008 if yearGroup == 2014
drop if mi(year)
egen check = rsum(d_*), missing 
drop if mi(check)
drop check yearGroup

merge 1:1 czone year using "$final_data_outcomes/temp2.dta", keep(3) nogenerate // merge = 2, only one ratio -> no delta is possible

save "$final_data_outcomes/IPEDS_MajorsCZ.dta", replace

rm "$final_data_outcomes/temp2.dta"
